#
# python code for building a parser from a grammar
#  Copyright Aaron Robert Watters, 1994
#

# BUGS:
#  A bad grammar that has no derivations for
#  the root nonterminal may cause a name error
#  on the variable "GoodStartingPlace"

# this needs to be modified so the RULEGRAM is loaded from a
# compiled representation if available.

import string
import kjSet
import kjParser

# import some constants
from kjParser import \
 TERMFLAG, NOMATCHFLAG, MOVETOFLAG, REDUCEFLAG, TRANSFLAG, KEYFLAG, \
 NONTERMFLAG, TERMFLAG, EOFFLAG, ENDOFFILETOKEN

PMODULE = kjParser.THISMODULE

# errors raised here
TokenError = "TokenError" # may happen on autogen with bad grammar
NotSLRError = "NotSLRError" # may happen for nonSLR grammar

# set this flag for regression testing at each load
RUNTESTS = 0
# set this flag to abort automatic generation on Errors
ABORTONERROR = 0

# token used to mark null productions
NULLTOKEN = (None,None)


# a derived FSM class, with closure computation methods defined
# (compilable FSMachine)
#
class CFSMachine(kjParser.FSMachine):

    def __init__(self, nonterm):
        kjParser.FSMachine.__init__(self, nonterm)

    # return the epsilon closure of the FSM as a new FSM
    #
    # DoNullMap, if set, will map unexpected tokens to
    # the "empty" state (usually creating a really big fsm)
    #
    def Eclosure(self, Epsilon, DoNullMaps=0):
        Closure = CFSMachine( self.root_nonTerminal )

        # compute the Epsilon Graph between states
        EGraph = kjSet.NewDG([])
        for State in range(0,self.maxState+1):
            # every state is E-connected to self
            kjSet.AddArc( EGraph, State, State )
            # add possible transition on epsilon (ONLY ONE SUPPORTED!)
            key = (State, Epsilon)
            if self.StateTokenMap.has_key(key):
                keymap = self.StateTokenMap[key]
                if keymap[0][0] != MOVETOFLAG:
                    raise TypeError, "unexpected map type in StateTokenMap"
                for (Flag,ToState) in keymap:
                    kjSet.AddArc( EGraph, State, ToState )
        #endfor
        # transitively close EGraph
        kjSet.TransClose( EGraph )

        # Translate EGraph into a dictionary of lists
        EMap = {}
        for State in range(0,self.maxState+1):
            EMap[State] = kjSet.Neighbors( EGraph, State )

        # make each e-closure of each self.state a state of the closure FSM.
        # here closure states assumed transient -- reset elsewhere.
        # first do the initial state
        Closure.States[ Closure.initial_state ] = \
           [TRANSFLAG, kjSet.NewSet(EMap[self.initial_state]) ]
        # do all other states (save initial and successful final states)
        #for State in range(0,self.maxState+1):
        #   if State != self.initial_state \
        #    and State != self.successful_final_state:
        #      Closure.NewSetState(TRANSFLAG, kjSet.NewSet(EMap[State]) )
        ##endfor

        # compute set of all known tokens EXCEPT EPSILON
        Tokens = kjSet.NewSet( [] )
        for (State, Token) in self.StateTokenMap.keys():
            if Token != Epsilon:
                kjSet.addMember(Token, Tokens)
        # tranform it into a list
        Tokens = kjSet.get_elts(Tokens)

        # for each state of the the closure FSM (past final) add transitions
        # and add new states as needed until all states are processed
        # (uses convention that states are allocated sequentially)
        ThisClosureState = 1
        while ThisClosureState <= Closure.maxState:
            MemberStates = kjSet.get_elts(Closure.States[ThisClosureState][1])
            # for each possible Token, compute the union UTrans of all
            # e-closures for all transitions for all member states,
            # on the Token, make  UTrans a new state (if needed),
            # and transition ThisClosureState to UTrans on Token
            for Token in Tokens:
                UTrans = kjSet.NewSet( [] )
                for MState in MemberStates:
                    # if MState has a transition on Token, include
                    # EMap for the destination state
                    key = (MState, Token)
                    if self.StateTokenMap.has_key(key):
                        DStateTup = self.StateTokenMap[key]
                        if DStateTup[0][0] != MOVETOFLAG:
                            raise TypeError, "unknown map type"
                        for (DFlag, DState) in DStateTup:
                            for EDState in EMap[DState]:
                                kjSet.addMember(EDState, UTrans)
                    #endif
                #endfor MState
                # register UTrans as a new state if needed
                UTState = Closure.NewSetState(TRANSFLAG, UTrans)
                # record transition from
                # ThisClosureState to UTState on Token
                if DoNullMaps:
                    Closure.SetMap( ThisClosureState, Token, UTState)
                else:
                    if not kjSet.Empty(UTrans):
                        Closure.SetMap( ThisClosureState, Token, UTState)
            #endfor Token
            ThisClosureState = ThisClosureState +1
        #endwhile
        return Closure
    #enddef Eclosure

    # add an set-marked state to self if not present
    # uses self.States[s][1] as the set marking the state s
    #
    # only used by Eclosure above
    #
    def NewSetState(self, kind, InSet):
        # return existing state if one is present that matches the set
        LastState= self.maxState
        # skip state 0 (successful final state)???
        for State in range(1,LastState+1):
            MarkSet = self.States[State][1]
            if kjSet.Same(InSet,MarkSet):
                return State  # nonlocal
        #endfor
        # if not exited then allocate a new state
        LastState = LastState + 1
        self.States[LastState] = [ kind , InSet ]
        self.maxState = LastState
        return LastState
    #enddef newSetState

#endclass CFSMachine


# Ruleset class, used to compute NFA and then DFA for
#  parsing based on a list of rules.
#
class ruleset:

    def __init__(self, StartNonterm, Rulelist):
        # initialize the ruleset
        self.StartNonterm = StartNonterm
        self.Rules = Rulelist

    # method to compute prefixes and First sets for nonterminals
    def CompFirst(self):
        # uses the special null production token NULLTOKEN
        # snarfed directly from Aho+Ullman (terminals glossed)
        First = kjSet.NewDG( [] )
        # repeat the while loop until no change is made to First
        done = 0
        while not done:
            done = 1 # assume we're done until a change is made to First

            # iterate through all rules looking for a new arc to add
            # indicating Terminal > possible first token derivation
            #
            for R in self.Rules:
                GoalNonterm = R.Nonterm
                Bodylength = len(R.Body)
                # look through the body of the rule up to the token with
                # no epsilon production (yet seen)
                Bodyindex = 0
                Processindex = 1
                while Processindex:
                    # unless otherwise indicated below, don't go to next token
                    Processindex = 0

                    # if index is past end of body then record
                    # an epsilon production for this nonterminal
                    if Bodyindex >= Bodylength:
                        if not kjSet.HasArc(First, GoalNonterm, NULLTOKEN ):
                            kjSet.AddArc( First, GoalNonterm, NULLTOKEN )
                            done = 0 # change made to First
                    else:
                        # otherwise try to add firsts of this token
                        # to firsts of the Head of the rule.
                        Token = R.Body[Bodyindex]
                        (type, name) = Token
                        if type in (KEYFLAG,TERMFLAG):
                            # try to add this terminal to First for GoalNonterm
                            if not kjSet.HasArc(First, GoalNonterm, Token):
                                kjSet.AddArc( First, GoalNonterm, Token)
                                done = 0
                        elif type == NONTERMFLAG:
                            # try to add each First entry for nonterminal
                            # to First entry for GoalNonterm
                            for FToken in kjSet.Neighbors( First, Token ):
                                if not kjSet.HasArc(First, GoalNonterm, FToken):
                                    kjSet.AddArc( First, GoalNonterm, FToken)
                                    done = 0
                            # does this nonterminal have a known e production?
                            if kjSet.HasArc( First, Token, NULLTOKEN ):
                                # if so, process next token in rule
                                Processindex = 1
                        else:
                            raise TokenError, "unknown token type in rule body"
                    #endif
                    Bodyindex = Bodyindex + 1
                #endwhile Processindex
            #endfor R in self.Rules
        #endwhile not done
        self.First = First
    #enddef CompFirst

    # computing the Follow set for the ruleset
    # the good news: I think it's correct.
    # the bad news: It's slower than it needs to be for epsilon cases.
    def CompFollow(self):
        Follow = kjSet.NewDG( [] )

        # put end marker on follow of start nonterminal
        kjSet.AddArc(Follow, self.StartNonterm, kjParser.ENDOFFILETOKEN)

        # now compute other follows using the rules;
        # repeat the loop until no change to Follow.
        done = 0
        while not done:
            done = 1 # assume done unless Follow changes
            for R in self.Rules:
                #print R
                # work backwards in the rule body to
                # avoid retesting for epsilon nonterminals
                Bodylength = len(R.Body)
                EpsilonTail = 1 # the tail of rule may expand to null
                BodyIndex = Bodylength - 1
                Last = 1 # loop starts at the last
                from types import TupleType
                while BodyIndex >= 0:
                    Token = R.Body[BodyIndex]
                    (Ttype,Tname) = Token
                    if Ttype in (KEYFLAG,TERMFLAG):
                        # keywords etc cancel epsilon tail, otherwise ignore
                        EpsilonTail = 0
                    elif Ttype == NONTERMFLAG:
                        # if the tail expands to epsilon, map
                        # follow for the goal nonterminal to this token
                        # and also follow for the tail nonterms
                        if EpsilonTail:
                            # add follow for goal
                            for FToken in kjSet.Neighbors(Follow,R.Nonterm):
                                if not kjSet.HasArc(Follow,Token,FToken):
                                    kjSet.AddArc(Follow,Token,FToken)
                                    #if type(FToken[0])==TupleType:
                                    #   raise ValueError, "bad FToken"+`FToken`
                                    #print "new", Token, FToken
                                    done = 0 # follow changed, loop again
                            # add follow for tail members
                            #for Index2 in range(BodyIndex+1, Bodylength):
                            #   TailToken = R.Body[Index2]
                            #   for FToken in kjSet.Neighbors(Follow,TailToken):
                            #       if not kjSet.HasArc(Follow,Token,FToken):
                            #          kjSet.AddArc(Follow,Token,FToken)
                            #          done = 0
                        #endif EpsilonTail

                        # if we are not at the end use First set for next token
                        if not Last:
                            NextToken = R.Body[BodyIndex+1]
                            (NTtype, NTname) = NextToken
                            if NTtype in (KEYFLAG,TERMFLAG):
                                if not kjSet.HasArc(Follow,Token,NextToken):
                                    kjSet.AddArc(Follow,Token,NextToken)
                                    #print "next", Token, NextToken
                                    done = 0
                            elif NTtype == NONTERMFLAG:
                                for FToken in kjSet.Neighbors(self.First, NextToken):
                                    if FToken != NULLTOKEN:
                                        if not kjSet.HasArc(Follow,Token,FToken):
                                            kjSet.AddArc(Follow,Token,FToken)
                                            #print "neighbor", Token, FToken
                                            done = 0
                                    else:
                                        # next token expands to epsilon:
                                        # add its follow, unless already done above
                                        #if not EpsilonTail:
                                        for FToken in kjSet.Neighbors(Follow,NextToken):
                                            if not kjSet.HasArc(Follow,Token,FToken):
                                                kjSet.AddArc(Follow,Token,FToken)
                                                #print "epsilon", Token, FToken
                                                done = 0
                            else:
                                raise TokenError, "unknown token type in rule body"
                        #endif not Last

                        # finally, check whether next iteration has epsilon tail
                        if not kjSet.HasArc(self.First, Token, NULLTOKEN):
                            EpsilonTail = 0
                    else:
                        raise TokenError, "unknown token type in rule body"

                    BodyIndex = BodyIndex - 1
                    Last = 0 # no longer at the last token of the rule
                #endwhile
            #endfor
        #endwhile
        self.Follow = Follow
    #enddef CompFollow

    def DumpFirstFollow(self):
        First = self.First
        Follow = self.Follow
        print "First:"
        for key in First.keys():
            name = key[1]
            print name," :: ",
            for (flag2,name2) in First[key].keys():
                print name2,", ",
            print
        print "Follow:"
        for key in Follow.keys():
            name = key[1]
            print name," :: ",
            for (flag2,name2) in Follow[key].keys():
                print name2,", ",
            print

    # computing the "first" of the tail of a rule followed by an
    #  optional terminal
    # doesn't include NULLTOKEN
    # requires self.First to be computed
    #
    def FirstOfTail(self, Rule, TailIndex, Token=None):
        Result = kjSet.NewSet( [] )
        # go through all tokens in rule tail so long as there is a
        #  null derivation for the remainder
        Nullprefix = 1
        BodyLength = len(Rule.Body)
        ThisIndex = TailIndex
        while Nullprefix and ThisIndex < BodyLength:
            RToken = Rule.Body[ThisIndex]
            (RTtype, RTname) = RToken
            if RTtype == NONTERMFLAG:
                for FToken in kjSet.Neighbors(self.First, RToken):
                    if FToken != NULLTOKEN:
                        kjSet.addMember(FToken, Result)
                #endfor
                # check whether this symbol might have a null production
                if not kjSet.HasArc(self.First, RToken, NULLTOKEN):
                    Nullprefix = 0
            elif RTtype in [KEYFLAG, TERMFLAG]:
                kjSet.addMember(RToken, Result)
                Nullprefix = 0
            else:
                raise TokenError, "unknown token type in rule body"
            ThisIndex = ThisIndex + 1
        #endwhile
        # add the optional token if given and Nullprefix still set
        if Nullprefix and Token != None:
            kjSet.addMember(Token, Result)
        return Result
    #enddef FirstOfTail

    # compute an SLR NFA for the ruleset with states for each SLR "item"
    # and transitions, eg:
    #     X > .AB
    #   on A maps to X > A.B
    #   on epsilon maps to A > .ZC
    #                  and A > .WK
    # an item is a pair (rulenumber, bodyposition)
    # where body position 0 is interpreted to point before the
    # beginning of the body.
    #
    # SLR = "simple LR" in Aho+Ullman terminology
    #
    def CompSLRNFA(self):
        NFA = CFSMachine(self.StartNonterm)
        Nrules = len(self.Rules)
        itemStateMap = {}
        for Ruleindex in range(0,Nrules):
            Rule = self.Rules[Ruleindex]
            # make an item for each "dot" position in the body
            for DotPos in range(0, len(Rule.Body) + 1):
                item = (Ruleindex, DotPos)
                itemState = NFA.NewState(TRANSFLAG, [item])
                itemStateMap[item] = itemState
            #endfor DotPos
        #endfor Ruleindex

        # now that the states are initialized
        # compute transitions except for the last item of a rule
        # (which has none)
        for Ruleindex in range(0,Nrules):
            Rule = self.Rules[Ruleindex]
            for DotPos in range(0, len(Rule.Body)):
                item = (Ruleindex, DotPos)
                CurrentToken = Rule.Body[DotPos]
                ThisState = itemStateMap[item]
                NextState = itemStateMap[ (Ruleindex, DotPos + 1) ]
                NFA.SetMap( ThisState, CurrentToken, NextState  )
                # if the current token is a nonterminal
                # ad epsilon transitions to first item for any
                # rule that derives this nonterminal
                (CTtype, CTname) = CurrentToken
                if CTtype == NONTERMFLAG:
                    for Rule2index in range(0,Nrules):
                        Rule2 = self.Rules[Rule2index]
                        Head = Rule2.Nonterm
                        if Head == CurrentToken:
                            NextState = itemStateMap[( Rule2index, 0 )]
                            NFA.SetMap( ThisState, NULLTOKEN, NextState )
                    #endfor Rule2index
                #endif CTtype == NONTERMFLAG
            #endfor DotPos
        #endfor Ruleindex

        # must handle the initial state properly here!
        # Make a dummy state with e-transitions to all first items
        # for rules that derive the initial nonterminal
        ThisState = NFA.initial_state
        GoodStartingPlace = None
        for Ruleindex in range(0,Nrules):
            Rule = self.Rules[Ruleindex]
            Head = Rule.Nonterm
            if Head == self.StartNonterm:
                GoodStartingPlace= (Ruleindex, 0)
                NextState = itemStateMap[ GoodStartingPlace ]
                NFA.SetMap( ThisState, NULLTOKEN, NextState )
        # fix the NFA.States entry
        if GoodStartingPlace == None:
            raise NotSLRError, "No derivation for root nonterminal."
        NFA.States[ NFA.initial_state ] = \
             [ 'transient', GoodStartingPlace ]

        self.SLRNFA = NFA
    #enddef CompSLRNFA

    # dump an item
    def ItemDump(self, item):
        (ruleindex, position) = item
        Rule = self.Rules[ruleindex]
        print Rule.Nonterm[1],' >> ',
        for bindex in range(0, len(Rule.Body)):
            if position == bindex:
                print " (*) ",
            print Rule.Body[bindex][1],
        if position == len(Rule.Body):
            print " (*) "
        else:
            print

    # utility function -- returns true if an item is a final item
    def SLRItemIsFinal(self, item):
        (ruleindex, position) = item
        Rule = self.Rules[ruleindex]
        if position == len(Rule.Body):
            return 1
        else:
            return 0

    # dump the NFA
    def DumpSLRNFA(self):
        NFA = self.SLRNFA
        print "root: ", NFA.root_nonTerminal
        for key in NFA.StateTokenMap.keys():
            map = NFA.StateTokenMap[key]
            (fromstate, token) = key
            fromitem = NFA.States[ fromstate ][1]
            self.ItemDump(fromitem)
            print " on ", token[1], " maps "
            for Tostate in map:
                Toitem = NFA.States[Tostate][1]
                print "    ",
                self.ItemDump(Toitem)

    # compute DFA for ruleset by computing the E-closure of the
    # NFA
    def CompDFA(self):
        self.DFA = self.SLRNFA.Eclosure(NULLTOKEN)

    def DumpDFAsets(self):
        DFA = self.DFA
        print "root: ", DFA.root_nonTerminal
        for State in range(1, len(DFA.States) ):
            self.DumpItemSet(State)

    def DumpItemSet(self,State):
        DFA = self.DFA
        NFA = self.SLRNFA
        print
        print "STATE ", State, " *******"
        fromNFAindices = kjSet.get_elts(DFA.States[State][1])
        for NFAindex in fromNFAindices:
            item = NFA.States[NFAindex][1]
            print "  ", NFAindex, ": ",
            self.ItemDump(item)

    # this function completes the computation of an SLR DFA
    # by adding reduction states for each DFA state S containing
    # item   H > B.
    # which reduces rule H > B
    # for each token T in Follow of H.
    # if S already has a transition for T then there is a conflict!
    #
    # assumes DFA and SLRNFA and Follow have been computed.
    #
    def SLRFixDFA(self):
        DFA = self.DFA
        NFA = self.SLRNFA
        # look through the states (except 0=success) of the DFA
        # initially don't add any new states, just record
        # actions to be done
        #   uses convention that 0 is successful final state

        # ToDo is a dictionary which maps
        #     (State, Token) to a item to reduce
        ToDo = {}
        Error = None
        for State in range(1, len(DFA.States) ):
            # look for a final item for a rule in this state
            fromNFAindices = kjSet.get_elts(DFA.States[State][1])
            for NFAindex in fromNFAindices:
                item = NFA.States[NFAindex][1]
                # if the item is final remember to do the reductions...
                if self.SLRItemIsFinal(item):
                    (ruleindex, position) = item
                    Rule = self.Rules[ruleindex]
                    Head = Rule.Nonterm
                    Following = kjSet.Neighbors( self.Follow, Head )
                    for Token in Following:
                        key = (State, Token)
                        if not ToDo.has_key(key):
                            ToDo[ key ] = item
                        else:
                            # it might be okay if the items are identical?
                            item2 = ToDo[key]
                            if item != item2:
                                print "reduce/reduce conflict on ",key
                                self.ItemDump(item)
                                self.ItemDump(item2)
                            Error = " apparent reduce/reduce conflict"
                        #endif
                    #endfor
                #endif
            #endfor NFAindex
        #endfor State

        # for each (State,Token) pair which indicates a reduction
        # record the reduction UNLESS the map is already set for the pair
        for key in ToDo.keys():
            (State,Token) = key
            item = ToDo[key]
            (rulenum, dotpos) = item
            ExistingMap = DFA.map( State, Token )
            if ExistingMap[0] == NOMATCHFLAG:
                DFA.SetReduction( State, Token, rulenum )
            else:
                print "apparent shift/reduce conflict"
                print "reduction: ", key, ": "
                self.ItemDump(item)
                print "existing map ", ExistingMap
                Error = " apparent shift/reduce conflict"
        #endfor
        if Error and ABORTONERROR:
            raise NotSLRError, Error
    #enddef SLRfixDFA()

    # do complete SLR DFA creation starting after initialization
    def DoSLRGeneration(self):
        self.CompFirst()
        self.CompFollow()
        self.CompSLRNFA()
        self.CompDFA()
        self.SLRFixDFA()

#endclass ruleset


################ the following are interpretation functions
################ used by RULEGRAM meta grammar
# some constants used here
COMMENTFORM = "##.*\n"
RSKEY = "@R"
COLKEY = "::"
LTKEY = ">>"
IDNAME = "ident"
# an identifier in the meta grammar is any nonwhite string
# except the keywords @R :: >> or comment flag ##
IDFORM = "[^" + string.whitespace + "]+"

# for identifiers simply return the string
def IdentFun(string):
    return string

# RootReduction should receive list of form
#   [ nontermtoken, keyword COLKEY, RuleList ]
def RootReduction(list, ObjectGram):
    if len(list) != 3 or list[1] != COLKEY:
        raise FlowError, "unexpected metagrammar root reduction"
    return (list[0], list[2])

# NullRuleList should receive list of form
#   []
def NullRuleList(list, ObjectGram):
    if list != []:
        raise FlowError, "unexpected null RuleList form"
    return []

# FullRuleList should receive list of form
#   [ Rule, RuleList ]
def FullRuleList(list, ObjectGram):
    if type(list) != type([]) or len(list)!=2:
        raise FlowError, "unexpected full RuleList form"
    NewRule = list[0]
    OldRules = list[1]
    return [NewRule] + OldRules

# InterpRule should receive list of form
#   [keyword RSKEY,
#    RuleNameStr,
#    keyword COLKEY,
#    Nontermtoken,
#    keyword LTKEY,
#    Bodylist]
#
def InterpRule(list, ObjectGram):
    # check keywords:
    if len(list) != 6 or \
       list[0] != RSKEY or \
       list[2] != COLKEY or \
       list[4] != LTKEY:
        raise FlowError, "unexpected meta rule reduction form"
    ruleName = list[1]
    ruleNonterm = list[3]
    ruleBody = list[5]
    # upcase the the representation of keywords if needed
    if not ObjectGram.LexD.isCaseSensitive():
        for i in range(0,len(ruleBody)):
            (flag, name) = ruleBody[i]
            if flag == KEYFLAG:
                ruleBody[i] = (KEYFLAG, string.upper(name))
            elif not flag in (TERMFLAG, NONTERMFLAG):
                raise FlowError, "unexpected rule body member"
    rule = kjParser.ParseRule( ruleNonterm, ruleBody )
    rule.Name = ruleName
    return rule

# InterpRuleName should receive
#    [ string ]
def InterpRuleName(list, ObjectGram):
    #print list
    # add error checking?
    return list[0]

# InterpNonTerm should receive
#    [ string ]
def InterpNonTerm(list, ObjectGram):
    #print list
    if type(list)!=type([]) or len(list)!=1:
        raise FlowError, "unexpected rulename form"
    Name = list[0]
    # determine whether this is a valid nonterminal
    if not ObjectGram.NonTermDict.has_key(Name):
        #print Name
        raise TokenError, "LHS of Rule must be nonterminal: "+Name
    return ObjectGram.NonTermDict[Name]

# NullBody should receive []
def NullBody(list, ObjectGram):
    #print list
    if list != []:
        raise FlowError, "unexpected null Body form"
    return []

# FullBody should receive
#  [ string, Bodylist]
# must determine whether the string represents
# a keyword, a nonterminal, or a terminal of the object
# grammar.
# returns (KEYFLAG, string) (TERMFLAG, string) or
#         (NONTERMFLAG, string) respectively
#
def FullBody(list,ObjectGram):
    #print list
    if type(list)!=type([]) or len(list)!=2:
        raise FlowError, "unexpected body form"
    Name = list[0]
    # Does the Name rep a nonterm, keyword or term
    # of the object grammar (in that order).
    if ObjectGram.NonTermDict.has_key(Name):
        kind = NONTERMFLAG
    elif ObjectGram.LexD.keywordmap.has_key(Name):
        kind = KEYFLAG
    elif ObjectGram.TermDict.has_key(Name):
        kind = TERMFLAG
    else:
        raise TokenError, "Rule body contains unregistered string: "+Name
    restOfBody = list[1]
    return [(kind, Name)] + restOfBody

# function to generate a grammar for parsing grammar rules
#
def ruleGrammar():
    LexD = kjParser.LexDictionary()
    # use SQL/Ansi style comments
    LexD.comment( COMMENTFORM )
    # declare keywords
    RStart = LexD.keyword( RSKEY )
    TwoColons = LexD.keyword( COLKEY )
    LeadsTo = LexD.keyword( LTKEY )
    # declare terminals
    ident = LexD.terminal(IDNAME, IDFORM, IdentFun )
    # declare nonterminals
    Root = kjParser.nonterminal("Root")
    Rulelist = kjParser.nonterminal("RuleList")
    Rule = kjParser.nonterminal("Rule")
    RuleName = kjParser.nonterminal("RuleName")
    NonTerm = kjParser.nonterminal("NonTerm")
    Body = kjParser.nonterminal("Body")

    # declare rules
    #   Root >> NonTerm :: Rulelist
    InitRule = kjParser.ParseRule( Root, \
                [NonTerm, TwoColons, Rulelist], RootReduction )
    #   Rulelist >>
    RLNull = kjParser.ParseRule( Rulelist, [], NullRuleList)
    #   Rulelist >> Rule Rulelist
    RLFull = kjParser.ParseRule( Rulelist, [Rule,Rulelist], FullRuleList)
    #   Rule >> "@R :: NonTerm >> Body
    RuleR = kjParser.ParseRule( Rule, \
       [RStart, RuleName, TwoColons, NonTerm, LeadsTo, Body],\
       InterpRule)
    #   Rulename >> ident
    RuleNameR = kjParser.ParseRule( RuleName, [ident], InterpRuleName)
    #   NonTerm >> ident
    NonTermR = kjParser.ParseRule( NonTerm, [ident], InterpNonTerm)
    #   Body >>
    BodyNull = kjParser.ParseRule( Body, [], NullBody)
    #   Body >> ident Body
    BodyFull = kjParser.ParseRule( Body, [ident,Body], FullBody)

    # declare Rules list and Associated Name dictionary
    Rules = [RLNull, RLFull, RuleR, RuleNameR, NonTermR,\
                 BodyNull, BodyFull, InitRule]
    RuleDict = \
     { "RLNull":0, "RLFull":1, "RuleR":2, "RuleNameR":3, \
       "NonTermR":4, "BodyNull":5, "BodyFull":6 , "InitRule":7 }
    # make the RuleSet and compute the associate DFA
    RuleSet = ruleset( Root, Rules )
    RuleSet.DoSLRGeneration()
    # construct the Grammar object
    Result = kjParser.Grammar( LexD, RuleSet.DFA, Rules, RuleDict )
    return Result

#enddef RuleGrammar()


# this is the rule grammar object for
# parsing
RULEGRAM = ruleGrammar()

# a derived grammar class (object oriented programming is cool!)
# this is a compilable grammar for automatic parser generation.
#
class CGrammar(kjParser.Grammar):

    # initialization is handled by the base class

    # insert a white separated list of keywords into the LexD
    # THIS SHOULD CHECK FOR KEYWORD/NONTERMINAL/PUNCT NAME
    # COLLISIONS (BUT DOESN'T YET).
    def Keywords(self, Stringofkeys):
        keywordlist = string.split(Stringofkeys)
        for keyword in keywordlist:
            self.LexD.keyword( keyword )

    # insert a string of punctuations into the LexD
    def punct(self, Stringofpuncts):
        for p in Stringofpuncts:
            self.LexD.punctuation(p)

    # register a list of regular expression strings
    # to represent comments in LexD
    def comments(self, listOfCommentStrings):
        for str in listOfCommentStrings:
            self.LexD.comment(str)

    # register a white separated list of nonterminal strings
    def Nonterms(self, StringofNonterms):
        nonTermlist = string.split(StringofNonterms)
        for NonTerm in nonTermlist:
            self.NonTermDict[NonTerm] = kjParser.nonterminal(NonTerm)

    # initialize or add more rules to the RuleString
    def Declarerules(self, StringWithRules):
        self.RuleString = self.RuleString + "\n" + StringWithRules

    # The compilation function assumes
    #   NonTermDict
    #   RuleString
    #   LexD
    #   TermDict
    # have all been set up properly
    # (at least if the default MetaGrammar is used).
    # On successful completion it will set up
    #   DFA
    #   RuleL
    #   RuleNameToIndex
    def Compile(self, MetaGrammar=RULEGRAM):
        # the following should return a list of rules
        # with punctuations of self.LexD interpreted as trivial keywords
        #   keywords of seld.LexD interpreted as keywords
        # and nonterminals registered in NonTermDict interpreted as
        # nonterms.
        #  ParseResult should be of form ( (rootNT, RuleL), self )
        ParseResult = MetaGrammar.DoParse1( self.RuleString, self )
        (RootNonterm, Rulelist) = ParseResult

        # make a ruleset and compute its DFA
        RuleS = ruleset( RootNonterm, Rulelist )
        RuleS.DoSLRGeneration()

        # make the rulename to index map to allow future bindings
        for i in range(0,len(Rulelist)):
            Rule = Rulelist[i]
            self.RuleNameToIndex[ Rule.Name ] = i

        # fill in the blanks
        self.DFA = RuleS.DFA
        self.RuleL = Rulelist

        # FOR DEBUG AND TESTING
        self.Ruleset = RuleS

        # DON'T clean up the grammar (misc structures are used)
        # in future bindings
    #enddef Compile


    # Write a reconstructable representation for this grammar
    # to a file
    #EXCEPT:
    #   - rule associations to reduction functions
    #     will be lost (must be reset elsewhere)
    #   - terminals in the lexical dictionary
    #     will not be initialized
    #
    # IND is used for indentation, should be whitespace (add check!)
    #
    # FName if given will cause the reconstructed to be placed
    # inside a function `FName`+"()" returning the grammar object
    #
    # NOTE: this function violates information hiding principles;
    #  in particular it "knows" the guts of the FSM and LexD classes
    #
    def Reconstruct(self, VarName, Tofile, FName=None, indent=""):
        Reconstruction = codeReconstruct(VarName, Tofile, self, FName, indent)
        GrammarDumpSequence(Reconstruction)
    #enddef Reconstruct

    # marshalling of a grammar to a file
    def MarshalDump(self, Tofile):
        Reconstruction = marshalReconstruct(self, Tofile)
        GrammarDumpSequence(Reconstruction)

#endclass CGrammar

# general procedure for different types of archiving for grammars
def GrammarDumpSequence(ReconstructObj):
    # assume an initialized Reconstruct Object with appropriate grammar etc.
    # put the lexical part
    ReconstructObj.PutLex()
    # put the rules
    ReconstructObj.PutRules()
    # put transitions
    ReconstructObj.PutTransitions()
    # finish up
    ReconstructObj.Cleanup()

# function to create a "null CGrammar"
def NullCGrammar():
    return CGrammar(None,None,None,{})

# utility classes -- Grammar reconstruction objects
#  encapsulate the process of grammar archiving.
#
class Reconstruct:

    # this "virtual class" is only for common behaviors of subclasses.
    def MakeTokenArchives(self):
        # make a list of all tokens and
        # initialize token > int dictionary
        keys = self.Gram.DFA.StateTokenMap.keys()
        tokenToInt = {}
        tokenSet = kjSet.NewSet([])
        for k in keys:
            kjSet.addMember(k[1], tokenSet)
        tokens = kjSet.get_elts(tokenSet)
        for i in range(0,len(tokens)):
            tokenToInt[ tokens[i] ] = i

        self.keys = keys
        self.tokens = tokens # global sub
        self.tokInt = tokenToInt # global sub

# grammar reconstruction to a file
class codeReconstruct(Reconstruct):

    def __init__(self, VarName, Tofile, Grammar, FName=None, indent =""):
        # do global subs for each of these
        self.Var = VarName
        self.File = Tofile
        self.FName = FName
        self.Gram = Grammar

        # put the reconstruction in a function if FName is given
        if FName != None:
            Tofile.write("\n\n")
            Tofile.write(indent+"def "+FName+"():\n")
            IND = indent+"   "
        else:
            IND = indent
        self.I = IND # global sub!
        Tofile.write("\n\n")
        Tofile.write(IND+"# ******************************BEGIN RECONSTRUCTION\n")
        Tofile.write(IND+"# Python declaration of Grammar variable "+VarName+".\n")
        Tofile.write(IND+"# automatically generated by module "+PMODULE+".\n")
        Tofile.write(IND+"# Altering this sequence by hand will probably\n")
        Tofile.write(IND+"# leave it unusable.\n")
        Tofile.write(IND+"#\n")
        Tofile.write(IND+"import "+PMODULE+"\n\n")
        Tofile.write(IND+"# variable declaration:\n")
        Tofile.write(IND+VarName+"= "+PMODULE+".NullGrammar()\n\n")

        # make self.keys list of dfa keys,
        #      self.tokens list of grammar tokens,
        #      self.tokInt inverted dictionary for self.tokens
        self.MakeTokenArchives()

        Tofile.write("\n\n"+IND+"# case sensitivity behavior for keywords.\n")
        if self.Gram.LexD.isCaseSensitive():
            Tofile.write(IND+VarName+".SetCaseSensitivity(1)\n")
        else:
            Tofile.write(IND+VarName+".SetCaseSensitivity(0)\n")
    #enddef __init__

    def PutLex(self):
        IND = self.I
        Tofile = self.File
        VarName = self.Var
        LexD = self.Gram.LexD
        tokens = self.tokens

        Tofile.write("\n\n"+IND+"# declaration of lexical dictionary.\n")
        Tofile.write(IND+"# EXCEPT FOR TERMINALS\n")
        Tofile.write(IND+VarName+".LexD.punctuationlist = ")
        Tofile.write(`LexD.punctuationlist`+"\n")
        Tofile.write(IND+"# now comment patterns\n")
        for comment in LexD.commentstrings:
            Tofile.write(IND+VarName+".LexD.comment("+`comment`+")\n")
        Tofile.write(IND+"# now define tokens\n")
        for i in range(0,len(tokens)):
            tok = tokens[i]
            (kind, name) = tok
            if kind == TERMFLAG:
                # put warning at end!
                #  nonterminal not installed in lexical dictionary here!
                Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
                Tofile.write(PMODULE+".termrep("+`name`+")\n")
            elif kind == KEYFLAG:
                Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
                Tofile.write(VarName+".LexD.keyword("+`name`+")\n")
            elif kind == NONTERMFLAG:
                Tofile.write(IND+VarName+".IndexToToken["+`i`+"] = ")
                Tofile.write(PMODULE+".nonterminal("+`name`+")\n")
            else:
                raise FlowError, "unknown token type"
    #enddef PutLex

    def PutRules(self):
        IND = self.I
        VarName = self.Var
        Rules = self.Gram.RuleL
        Tofile = self.File
        Root = self.Gram.DFA.root_nonTerminal
        Tofile.write("\n\n"+IND+"# declaration of rule list with names.\n")
        Tofile.write(IND+"# EXCEPT FOR INTERP FUNCTIONS\n")
        nrules = len(Rules)
        Tofile.write(IND+VarName+".RuleL = [None] * "+`nrules`+"\n")
        for i in range(0,nrules):
            # put warning at end:
            #  rule reduction function not initialized here!
            rule = Rules[i]
            name = rule.Name
            Tofile.write(IND+"rule = "+`rule`+"\n")
            Tofile.write(IND+"name = "+`name`+"\n")
            Tofile.write(IND+"rule.Name = name\n")
            Tofile.write(IND+VarName+".RuleL["+`i`+"] = rule\n")
            Tofile.write(IND+VarName+".RuleNameToIndex[name] = "+`i`+"\n")

        Tofile.write("\n\n"+IND+"# DFA root nonterminal.\n")
        Tofile.write(IND+VarName+".DFA.root_nonTerminal =")
        Tofile.write(`Root`+"\n")
    #enddef PutRules

    def PutTransitions(self):
        IND = self.I
        Tofile = self.File
        VarName = self.Var
        maxState = self.Gram.DFA.maxState
        tokenToInt = self.tokInt
        StateTokenMap = self.Gram.DFA.StateTokenMap
        keys = self.keys

        Tofile.write("\n\n"+IND+"# DFA state declarations.\n")
        for state in range(1, maxState+1):
            Tofile.write(IND+VarName+".DFA.States["+`state`+"] = ")
            Tofile.write('['+`TRANSFLAG`+']\n')
        Tofile.write(IND+VarName+".DFA.maxState = "+`maxState`+"\n")

        Tofile.write("\n\n"+IND+"# DFA transition declarations.\n")
        for key in keys:
            (fromState, TokenRep) = key
            TokenIndex = tokenToInt[TokenRep]
            TokenArg = VarName+".IndexToToken["+`TokenIndex`+"]"
            TMap = StateTokenMap[key]
            TMaptype = TMap[0][0]
            if TMaptype == REDUCEFLAG:
                # reduction
                rulenum = TMap[0][1]
                Args = "("+`fromState`+","+TokenArg+","+`rulenum`+")"
                Tofile.write(IND+VarName+".DFA.SetReduction"+Args+"\n")
            elif TMaptype == MOVETOFLAG:
                # MoveTo
                Args = "("+`fromState`+","+TokenArg+","+`TMap[0][1]`+")"
                Tofile.write(IND+VarName+".DFA.SetMap"+Args+"\n")
            else:
                raise FlowError, "unexpected else (2)"
    #enddef

    def Cleanup(self):
        Tofile = self.File
        RuleL = self.Gram.RuleL
        tokens = self.tokens
        VarName = self.Var
        IND = self.I
        FName = self.FName

        Tofile.write("\n\n"+IND+"# Clean up the grammar.\n")
        Tofile.write(IND+VarName+".CleanUp()\n")

        # if the Fname was given return the grammar as function result
        if FName != None:
            Tofile.write("\n\n"+IND+"# return the grammar.\n")
            Tofile.write(IND+"return "+VarName+"\n")

        Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n")
        Tofile.write(IND+"# You must bind the following rule names \n")
        Tofile.write(IND+"# to reduction interpretation functions \n")
        for R in RuleL:
            Tofile.write(IND+"# "+VarName+".Bind("+`R.Name`+", ??function??)\n")
        Tofile.write(IND+"#(last rule)\n")

        Tofile.write("\n\n"+IND+"# WARNINGS ****************************** \n")
        Tofile.write(IND+"# You must bind the following terminals \n")
        Tofile.write(IND+"# to regular expressions and interpretation functions \n")
        warningPrinted = 0
        for tok in tokens:
            (kind, name) = tok
            if kind == TERMFLAG and tok != ENDOFFILETOKEN:
                Tofile.write(IND+"# "+VarName+\
                  ".Addterm("+`name`+", ??regularExp??, ??function??)\n")
                warningPrinted = 1
        if not warningPrinted:
            Tofile.write(IND+"#  ***NONE** \n")
        Tofile.write(IND+"#(last terminal)\n")
        Tofile.write(IND+"# ******************************END RECONSTRUCTION\n")
    #enddef
#endclass

# reconstruction using marshalling to a file
#  encodes internal structures for grammar using marshal-able
#  objects.  Final marshalling to the file is done at CleanUp()
#  storing one big tuple.
#
class marshalReconstruct(Reconstruct):

    def __init__(self, Grammar, Tofile):
        self.Gram = Grammar
        self.File = Tofile
        # should archive self.tokens structure
        self.MakeTokenArchives()
        # archive this
        self.CaseSensitivity = Grammar.LexD.isCaseSensitive()

    def PutLex(self):
        LexD = self.Gram.LexD
        # archive these
        self.punct = LexD.punctuationlist
        self.comments = LexD.commentstrings

    def PutRules(self):
        # archive this
        self.Root = self.Gram.DFA.root_nonTerminal
        # make a list of tuples that can be used with
        # rule = apply(ParseRule, tuple[1])
        # rule.Name = tuple[0]
        Rules = self.Gram.RuleL
        nrules = len(Rules)
        RuleTuples = [None] * nrules
        for i in range(nrules):
            rule = Rules[i]
            RuleTuples[i] = (rule.Name, rule.components())
        #archive this
        self.RuleTups = RuleTuples

    def PutTransitions(self):
        keys = self.keys
        tokenToInt = self.tokInt
        StateTokenMap = self.Gram.DFA.StateTokenMap

        # archive this
        self.MaxStates = self.Gram.DFA.maxState

        # create two lists,
        #   one for reductions with contents (fromState, tokennumber, rulenum)
        #   one for movetos with contents (fromstate, tokennumber, tostate)
        #      (note: token number not token itself to allow sharing)
        # to allow arbitrary growing, first use dicts:
        reductDict = {}
        nreducts = 0
        moveToDict = {}
        nmoveTos = 0
        for key in self.keys:
            (fromState, TokenRep) = key
            TokenIndex  = tokenToInt[TokenRep]
            TMap = StateTokenMap[key]
            TMaptype = TMap[0][0]
            if TMaptype == REDUCEFLAG:
                rulenum = TMap[0][1]
                reductDict[nreducts] = (fromState, TokenIndex, rulenum)
                nreducts = nreducts + 1
            elif TMaptype == MOVETOFLAG:
                ToState = TMap[0][1]
                moveToDict[nmoveTos] = (fromState, TokenIndex, ToState)
                nmoveTos = nmoveTos + 1
            else:
                raise FlowError, "unexpected else"
        #endfor
        # translate dicts to lists
        reducts = [None] * nreducts
        for i in range(nreducts):
            reducts[i] = reductDict[i]
        moveTos = [None] * nmoveTos
        for i in range(nmoveTos):
            moveTos[i] = moveToDict[i]

        # archive these
        self.reducts = reducts
        self.moveTos = moveTos

    # this is the function that does the marshalling
    def Cleanup(self):
        import marshal
        # make the big list to marshal
        BigList = [None] * 9
        BigList[0] = self.tokens
        BigList[1] = self.punct
        BigList[2] = self.comments
        BigList[3] = self.RuleTups
        BigList[4] = self.MaxStates
        BigList[5] = self.reducts
        BigList[6] = self.moveTos
        BigList[7] = self.Root
        BigList[8] = self.CaseSensitivity
        # dump the big list to the file
        marshal.dump( BigList, self.File )

#end class

#######################testing stuff
if RUNTESTS:
    def echo(x): return x

    # simple grammar stolen from a text
    LD0 = kjParser.LexDictionary()
    id = LD0.terminal("id","id",echo)
    plus = LD0.punctuation("+")
    star = LD0.punctuation("*")
    oppar = LD0.punctuation("(")
    clpar = LD0.punctuation(")")
    equals = LD0.punctuation("=")
    E = kjParser.nonterminal("E")
    T = kjParser.nonterminal("T")
    Tp = kjParser.nonterminal("Tp")
    Ep = kjParser.nonterminal("Ep")
    F = kjParser.nonterminal("F")
    rule1 = kjParser.ParseRule( E, [ T, Ep ] )
    rule2 = kjParser.ParseRule( Ep, [ plus, T, Ep ] )
    rule3 = kjParser.ParseRule( Ep, [ ] )
    rule4 = kjParser.ParseRule( T, [ F, Tp ] )
    rule5 = kjParser.ParseRule( Tp, [ star, F, Tp ] )
    rule6 = kjParser.ParseRule( Tp, [ ] )
    rule7 = kjParser.ParseRule( F, [ oppar, E, clpar ] )
    rule8 = kjParser.ParseRule( F, [ id ] )

    rl0 = [ rule1, rule2, rule3, rule4, rule5, rule6, rule7,rule8]
    rs0 = ruleset(E, rl0)
    rs0.CompFirst()
    Firstpairs = kjSet.GetPairs(rs0.First)
    rs0.CompFollow()
    Followpairs = kjSet.GetPairs(rs0.Follow)
    rs0.CompSLRNFA()
    NFA0 = rs0.SLRNFA
    rs0.CompDFA()
    rs0.SLRFixDFA()
    DFA0 = rs0.DFA

    class dummy: pass
    ttt0 = dummy()

    def TESTDFA( STRING , ttt, DFA, Rulelist, DOREDUCTIONS = 1):
        ttt.STRING = STRING
        #ttt.List = kjParser.LexList(LD0, ttt.STRING)
        ttt.Stream = kjParser.LexStringWalker( ttt.STRING, LD0 )
        ttt.Stack = {-1:0}# Walkers.SimpleStack()
        ttt.ParseObj = kjParser.ParserObj( Rulelist, \
                           ttt.Stream, DFA, ttt.Stack,DOREDUCTIONS)
        ttt.RESULT = ttt.ParseObj.GO()
        #ttt.Stack.Dump(10)
        return ttt.RESULT

    def TESTDFA0( STRING , DOREDUCTIONS = 1):
        return TESTDFA( STRING, ttt0, DFA0, rl0, DOREDUCTIONS )

    TESTDFA0( " id + id * id ")

    # an even simpler grammar
    S = kjParser.nonterminal("S")
    M = kjParser.nonterminal("M")
    A = kjParser.nonterminal("A")
    rr1 = kjParser.ParseRule( S, [M] )
    #rr2 = kjParser.ParseRule( A, [A, plus, M])
    #rr3 = kjParser.ParseRule( A, [M], echo)
    #rr4 = kjParser.ParseRule( M, [M, star, M])
    rr5 = kjParser.ParseRule( M, [oppar, M, clpar])
    rr6 = kjParser.ParseRule( M, [id])
    rl1 = [rr1,rr5,rr6]
    rs1 = ruleset(S, rl1)
    rs1.CompFirst()
    rs1.CompFollow()
    rs1.CompSLRNFA()
    rs1.CompDFA()
    rs1.SLRFixDFA()
    DFA1 = rs1.DFA

    ttt1=dummy()

    def TESTDFA1( STRING , DOREDUCTIONS = 1):
        return TESTDFA( STRING, ttt1, DFA1, rl1, DOREDUCTIONS )

    X = kjParser.nonterminal("X")
    Y = kjParser.nonterminal("Y")
    RX = kjParser.ParseRule( X, [ oppar, Y, clpar ] )
    RY = kjParser.ParseRule( Y, [] )
    rl2 = [RX,RY]
    rs2 = ruleset(X, rl2)
    rs2.CompFirst()
    rs2.CompFollow()
    rs2.CompSLRNFA()
    rs2.CompDFA()
    rs2.SLRFixDFA()
    DFA2 = rs2.DFA

    ttt2 = dummy()
    def TESTDFA2( STRING, DOREDUCTIONS = 1):
        return TESTDFA( STRING, ttt2, DFA2, rl2, DOREDUCTIONS )

    # the following grammar should fail to be slr
    # (Aho,Ullman p. 213)

    S = kjParser.nonterminal("S")
    L = kjParser.nonterminal("L")
    R = kjParser.nonterminal("R")
    RS1 = kjParser.ParseRule( S, [L, equals, R] )
    RS2 = kjParser.ParseRule( S, [R], echo )
    RL1 = kjParser.ParseRule( L, [star, R])
    RL2 = kjParser.ParseRule( L, [id])
    RR1 = kjParser.ParseRule( R, [L] )
    rs3 = ruleset(S, [RS1,RS2,RL1,RL2,RR1])
    rs3.CompFirst()
    rs3.CompFollow()
    rs3.CompSLRNFA()
    rs3.CompDFA()
    #rs3.SLRFixDFA() # should fail and does.

    # testing RULEGRAM
    ObjG = NullCGrammar()
    ObjG.Addterm("id","id",echo)
    ObjG.Nonterms("T E Ep F Tp")
    ObjG.Keywords("begin end")
    ObjG.punct("+*()")
    ObjG.comments(["--.*\n"])
    # PROBLEM WITH COMMENTS???
    Rulestr = """
      ## what a silly grammar!
      T ::
      @R One :: T >> begin E end
      @R Three :: E >>
      @R Two :: E >> E + T
      @R Four :: E >> ( T )
      """
    RL = RULEGRAM.DoParse1( Rulestr, ObjG )
